library(ggplot2)
library(cowplot)
library(dplyr)
library(tidyr)
library(ggsci)
library(ggpubr)
library(MetBrewer)
library(survival)
library(survminer)
library(cmprsk)
library(RColorBrewer)
# IPSS-M package
library(ipssm)
# Clinical data
ddc <- read.table("./data/df_clinical.tsv", header=T, sep="\t", stringsAsFactors = F)
# CNA binary data
ddcyto <- read.table("./data/df_cna.tsv", header=T, sep="\t", stringsAsFactors = F)
# Mutation binary data
ddmut <- read.table("./data/df_mut.tsv", header=T, sep="\t", stringsAsFactors = F)
# Mutation in MAF format (i.e. one line per mutation per patient)
maf <- read.table("./data/maf.tsv", header=T, sep="\t", stringsAsFactors = F)
# Clinical data
colnames(ddc)
nrow(ddc)
# Mutation binary data
colnames(ddmut)
# CNA binary data
colnames(ddcyto)
# Mutation in MAF format
head(maf,1)
nrow(maf)
# Master Dataframe
dd <- left_join(ddc, ddmut, by="ID")
dd <- left_join(dd, ddcyto, by="ID")
# Some Plotting Utils
gtheme <- function(size=18) {
theme(text=element_text(size=size,colour="black"),
axis.text=element_text(size=size,colour="black"),
axis.title=element_text(size=size,colour="black"),
strip.text.x = element_text(size=size),
legend.text=element_text(size=size))
}
noleg = theme(legend.position = "none")
topleg = theme(legend.position = "top")
noxtitle = theme(axis.title.x = element_blank())
noytitle = theme(axis.title.y = element_blank())
noxlabel = theme(axis.text.x = element_blank())
noylabel = theme(axis.text.y = element_blank())
nolegtitle = theme(legend.title = element_blank())
# Levels
dd$IPSSR <- factor(dd$IPSSR, levels=c("Very-Low","Low","Int","High","Very-High"))
paste("Number of patients is:", nrow(dd))
table(dd$WHO_2016, exclude=F)
table(dd$IPSSR)
options(repr.plot.width=15, repr.plot.height=2)
dd$who <- dd$WHO_2016
dd$who[dd$who %in% c("MDS-SLD","MDS-MLD")] <- "MDS-SLD/MLD"
dd$who[dd$who %in% c("MDS-RS-SLD","MDS-RS-MLD")] <- "MDS-RS-SLD/MLD"
dd$who[dd$who %in% c("MDS-EB1","MDS-EB2")] <- "MDS-EB1/2"
dd$who[dd$WHO_2016=="aCML"] <- "other"
dd$who <- factor(as.vector(dd$who),levels=c("MDS-del5q",
"MDS-RS-SLD/MLD","MDS-SLD/MLD",
"MDS-EB1/2","MDS-U","CMML","MDS/MPN-RS-T",
"MDS/MPN-U","other"))
gswho <- ggplot(dd, aes(x="", fill=who)) +
geom_bar(position="fill",color="black") + theme_classic() + gtheme(18) +
xlab("") + ylab("%") +
scale_y_continuous(breaks=c(0,.25,.50,.75,1),labels=c(0,25,50,75,100)) +
scale_fill_uchicago(name="WHO", labels=c(levels(dd$who),"missing"), na.value="grey65") +
theme(axis.line.y=element_blank(), axis.ticks.y=element_blank(),axis.text.y=element_blank()) +
coord_flip() + topleg
col.ipssr = c("#238b45","#66c2a4","#807dba","#fdbb84","#ef6548")
gsipss <- ggplot(dd, aes(x="", fill=IPSSR)) +
geom_bar(position="fill",color="black") + theme_classic() + gtheme(18) +
xlab("") + ylab("%") +
scale_y_continuous(breaks=c(0,.25,.50,.75,1),labels=c(0,25,50,75,100)) +
scale_fill_manual(values=col.ipssr,name="IPSS-R Category",
labels=c("Very Low","Low","Int","High","Very High","missing"),
na.value="grey65"
) +
theme(axis.line.y=element_blank(), axis.ticks.y=element_blank(),axis.text.y=element_blank()) +
coord_flip() + topleg
options(repr.plot.width=15, repr.plot.height=4)
giw <- ggarrange(gsipss + theme(axis.line.x=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank())+ylab(""),
NULL,
gswho + theme(legend.position="bottom"),
heights=c(1,-0.2,1.12),
ncol=1)
giw
options(warn=-1)
gage <- ggplot(dd, aes(x="",y=AGE)) +
geom_boxplot(outlier.shape=NA, fill="grey65") +
theme_classic() + gtheme(18)+ noleg +
coord_trans(ylim=c(40,100)) +
ylab("Age in years") + xlab("") +
theme(axis.line.x=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank())
ghb <- ggplot(dd, aes(x="",y=HB)) +
geom_boxplot(outlier.shape=NA, fill="grey65") +
theme_classic() + gtheme(18) + noleg +
coord_trans(ylim=c(4,16)) +
ylab("Hemoglobin in g/dL") + xlab("") +
theme(axis.line.x=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank())
gplt <- ggplot(dd, aes(x="",y=PLT)) +
geom_boxplot(outlier.shape=NA, fill="grey65") +
theme_classic() + gtheme(18)+ noleg +
coord_trans(ylim=c(0,500)) +
ylab("Platelets in Giga/L") + xlab("") +
theme(axis.line.x=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank())
ganc <- ggplot(dd, aes(x="",y=ANC)) +
geom_boxplot(outlier.shape=NA, fill="grey65") +
theme_classic() + gtheme(18) + noleg +
coord_trans(ylim=c(0,7)) +
ylab("ANC in Giga/L") + xlab("") +
theme(axis.line.x=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank())
gblast <- ggplot(dd, aes(x="",y=BM_BLAST)) +
geom_boxplot(outlier.shape=NA, fill="grey65") +
theme_classic() + gtheme(18) + noleg +
stat_compare_means(label.y=20,comparisons=list(c(1,2)),label="p.signif",tip.length=0) +
coord_trans(ylim=c(0,20)) +
ylab("Bone marrow blasts in %") + xlab("") +
theme(axis.line.x=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank())
options(repr.plot.width=15, repr.plot.height=4)
gclinical <- ggarrange(gage, gblast, ghb, gplt, ganc, ncol=5)
gclinical
# Function
mutation_effect <- function(vec_vag_effect) {
# category
truncated = c("frameshift_variant","complex_change_in_transcript","initiator_codon_change","splice_site_variant","stop_gained","stop_lost","stop_retained_variant","synonymous_codon")
# note that the synonymous_codon in the dataset affects TP53
# ie p.T125T and have been shown to affect splicing and be truncated-like.
inframe_indel = c("inframe_codon_gain","inframe_codon_loss","inframe_variant")
missense = c("non_synonymous_codon","synonymous_codon")
missense = c(missense,"2KB_upstream_variant","3_prime_UTR_variant")
# 2KB_upstream_variant = TERT PROMOTER
# 3_prime_UTR_variant = TET2 UTR
itdptd = c("ITD","PTD")
# res
res = rep(NA, length(vec_vag_effect))
res[vec_vag_effect %in% truncated] = "truncated"
res[vec_vag_effect %in% inframe_indel] = "inframe-indel"
res[vec_vag_effect %in% missense] = "missense"
res[vec_vag_effect %in% "ITD"] = "itd"
res[vec_vag_effect %in% "PTD"] = "ptd"
res[vec_vag_effect %in% "2KB_upstream_variant"] = "promoter"
#res[vec_vag_effect %in% "3_prime_UTR_variant"] = "utr"
res[vec_vag_effect %in% "3_prime_UTR_variant"] = "missense"
return(res)
}
CreateCombineMaf <- function(ddmaf, ddcyto, complex.vector) {
mymaf = ddmaf
# simplified effect
mymaf$Consequence = mutation_effect(mymaf$EFFECT)
# multiple hits cases
mymaf$mkey = paste(mymaf$ID, mymaf$GENE, sep="_")
im = which(duplicated(mymaf$mkey))
am = unique(mymaf$mkey[im])
mymaf[mymaf$mkey %in% am,"Consequence"] = "multiple-mutations"
mymaf = mymaf[-im,]
rm(im,am)
mymaf = mymaf[,c("ID","GENE","PROTEIN_CHANGE","Consequence")]
mymaf$Type = "Gene"
colnames(mymaf) = c("ID","Identity","Change","Consequence","Type")
# ADD CYTO TO MAF
tmp = ddcyto[,-1]
tmp$complex = 0
tmp$complex[which(complex.vector=="complex")] = 1
rownames(tmp) = ddcyto[,1]
resf = lapply(rownames(tmp), function(x) {
ia = which(tmp[rownames(tmp)==x,]==1)
if (length(ia)>0) {
ca = colnames(tmp)[ia]
return(data.frame(ID=rep(x,length(ca)),
Identity=ca, Change=rep("chromosomal",length(ca)),Consequence=rep("chromosomal",length(ca)),Type="Chromosome"))
} else {
return(NULL)
}
})
mymaf = rbind(mymaf, do.call("rbind",resf))
mymaf$Consequence2 = mymaf$Consequence
mymaf$Consequence2[mymaf$Type=="Chromosome" & (grepl("del",mymaf$Alteration)|grepl("plus",mymaf$Alteration))] = "copy-number-change"
mymaf$Consequence2[mymaf$Type=="Chromosome" & grepl("upd",mymaf$Alteration)] = "copy-neutral-loh"
mymaf$Consequence2[mymaf$Type=="Chromosome" & grepl("r_",mymaf$Alteration)] = "rearrangement"
mymaf$Identity = factor(mymaf$Identity, levels=rev(names(sort(table(mymaf$Identity)))))
mymaf$Consequence = factor(mymaf$Consequence, levels=c("missense","inframe-indel","truncated","multiple-mutations","itd","ptd","promoter","utr","chromosomal"))
mymaf$Consequence2 = factor(mymaf$Consequence2, levels=c("missense","inframe-indel","truncated","multiple-mutations","itd","ptd","promoter","utr","copy-number-change","copy-neutral-loh","rearrangement","chromosomal"))
return(mymaf)
}
mymaf = CreateCombineMaf(maf, ddcyto,complex.vector=dd$complex)
allgenes <- colnames(ddmut)[2:127]
allcyto <- colnames(ddcyto)[-1]
allcyto <- allcyto[!allcyto %in% "ring"]
gogenes <- unique(c(allgenes[apply(dd[,allgenes],2,sum)>=10],"MLL","FLT3"))
gocyto <- c("complex",allcyto[apply(dd[dd$complex=="non-complex",allcyto],2,sum)>=10])
tmp <- mymaf
tmp$Consequence <- as.vector(tmp$Consequence)
tmp$Consequence[tmp$Consequence%in%c("itd","ptd")] <- "tandem duplication"
tmp$Consequence <- factor(tmp$Consequence, levels=unique(gsub("ptd","tandem duplication",gsub("itd","tandem duplication",levels(mymaf$Consequence)))))
tmp <- tmp[tmp$Identity %in% c(gogenes,gocyto),]
tmp <- mymaf
tmp$Consequence <- as.vector(tmp$Consequence)
tmp$Consequence[tmp$Consequence%in%c("itd","ptd")] <- "tandem duplication"
tmp$Consequence <- factor(tmp$Consequence, levels=unique(gsub("ptd","tandem duplication",gsub("itd","tandem duplication",levels(mymaf$Consequence)))))
tmp <- tmp[tmp$Identity %in% c(gogenes,gocyto),]
lc1 <- levels(tmp$Identity)[levels(tmp$Identity)%in%c(gogenes,gocyto)]
lc11 <- lc1
lc11 <- gsub("del7q","del(7q)",gsub("delY","-Y",gsub("plus","+",gsub("del5q","del(5q)",lc11))))
lc11 <- gsub("del11q","del(11q)",gsub("del7","-7",gsub("del20q","del(20q)",lc11)))
col.fig1a <- met.brewer("Derain")
col.fig1a <- c( col.fig1a[c(2,3,4,6,1)] , "grey85" , col.fig1a[c(7)] )
options(repr.plot.width=35, repr.plot.height=11)
ggplot(tmp) +
geom_bar(aes(x=Identity,y=100*(..count../nrow(dd)),fill=Consequence), color="black", size=.8, width=.84) +
theme_classic() + gtheme(26) +
topleg + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
noxtitle + ylab("% of patients") +
scale_fill_manual( values=col.fig1a , name="") +
scale_x_discrete(labels=lc11)
dd$num_mut <- sapply(dd$ID, function(x) sum(maf$ID==x))
dd$num_cyto <- apply(dd[,allcyto],1,sum)
dd$num_driver <- dd$num_mut + dd$num_cyto
summary(dd$num_mut)
summary(dd$num_driver)
cc <- 8
dd$range_driver <- dd$num_driver
dd$range_driver[dd$num_driver>cc] <- paste0(">",cc)
dd$range_driver <- factor(dd$range_driver, levels=c(0:cc,paste0(">",cc)))
dd$onecase = sapply(1:nrow(dd), function(i) {
x = dd$num_driver[i]
res = NA
p = dd$ID[i]
if (x==1) {
res = as.vector(mymaf[mymaf$ID==p,"Identity"])
}
return(res)
})
aa = dd
a = names(sort(table(dd$onecase),decreasing=T)[1:8])
aa$label = NA
aa$label[!is.na(aa$onecase)] = "other"
aa$label[which(aa$onecase %in% a)] = aa$onecase[which(aa$onecase %in% a)]
aa$label = factor(aa$label, levels=c(a,"other"))
barlev <- gsub("del20q","del(20q)",gsub("delY","-Y",gsub("del5q","del(5q)",levels(aa$label))))
demuth <- met.brewer("Demuth")
col.fig1b <- c(demuth[1:5],demuth[c(10,9,8,7)])
gh0 <- ggplot(aa[!is.na(aa$label),], aes(x=range_driver,fill=label,y=100*..count../sum(..count..))) +
geom_bar(color="black") +
theme_classic() + gtheme(22) + ylab("% of patients") +
scale_fill_manual(values=col.fig1b,
labels=c(barlev,"NA"),
na.value=demuth[6]) +
nolegtitle + xlab("Number of driver events") +
guides(fill=guide_legend(ncol=2,byrow=TRUE)) +
theme(legend.position = c(0.8, 0.8)) + nolegtitle
legendh0 <- get_legend(gh0)
histleg <- ggarrange(legendh0)
#histleg
gh <- ggplot(aa, aes(x=range_driver,fill=label,y=100*..count../sum(..count..))) +
geom_bar(color="black") +
theme_classic() + gtheme(22) + ylab("% of patients") +
scale_fill_manual(values=col.fig1b,
labels=c(barlev,"lala"),
na.value=demuth[6]) +
noleg + xlab("Number of driver events")
gb <- ggplot(aa, aes(x=range_driver,y=BM_BLAST)) +
geom_violin(color="black",draw_quantiles = .5,fill=demuth[6]) +
theme_classic() + gtheme(22) + ylab("Bone marrow blasts (%)") +
#stat_compare_means() +
annotate("text",label=paste0("p"," < ","0.0001"),y=19,x=1.2,size=7) +
nolegtitle + noxtitle +
#scale_y_continuous(breaks=c(0,-5,-10,-15,-20),labels=c(0,5,10,15,20)) +
theme(axis.line.x=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank())
ghh <- ggdraw() +
draw_plot(gh + noleg) +
draw_plot(histleg, x = .58, y = .5, width = .3, height = .4)
options(repr.plot.width=9.5, repr.plot.height=10)
gtwo <- ggarrange(ghh, gb, ncol=1, heights=c(2,1.1))
gtwo
cc <- 6
aa$range_driver3 <- as.vector(aa$num_driver)
aa$range_driver3[aa$num_driver>cc] <- paste0(">",cc)
aa$range_driver3[aa$num_driver%in%c(1,2)] <- "1-2"
aa$range_driver3[aa$num_driver%in%c(3,4)] <- "3-4"
aa$range_driver3[aa$num_driver%in%c(5,6)] <- "5-6"
aa$range_driver3[aa$label=="SF3B1"] <- "isolated SF3B1"
aa$range_driver3 <- factor(aa$range_driver3, levels=c("0","isolated SF3B1","1-2","3-4","5-6",paste0(">",cc)))
nncol <- c(
"grey55",
"#3690c0",
"#ccece6",
"#66c2a4",
"#238b45",
"#00441b"
)
table(aa$range_driver3 , exclude=F)
# LFS
look <- "range_driver3"
ff.free = as.formula(paste("Surv(LFS_YEARS,LFS_STATUS)~",look))
km.free = survfit(ff.free ,data=aa) ; km.free$call$formula <- ff.free
myleg.free = paste0(gsub(paste0(look,"="),"",names(km.free$strata)))
gall.free <- ggsurvplot(km.free, data=aa,
color=look,
legend.labs=myleg.free,
legend.title="Num. drivers",
size=1.1, conf.int=F,
xlim=c(0,10), break.time.by=1,
pval=T, pval.coord=c(8.5, 0.99), pval.size=7,
#surv.median.line="hv",
palette=nncol,
risk.table=TRUE,
#tables.col="look",
tables.y.text=FALSE,
risk.table.title="No. at risk",risk.table.fontsize=5
)
gfree <- gall.free$plot + theme_classic() + gtheme(22) + topleg +
theme(legend.key.width = unit(1.1,"cm")) + xlab("Years") + ylab("LFS probability") +
guides(color = guide_legend(override.aes = list(shape = NA))) +
guides(color=guide_legend(nrow=3)) +
theme(legend.key.size = unit(1.7, 'lines'))
gtable.free <- gall.free$table + gtheme(18) +
theme(axis.line=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank()) +
noxtitle + noytitle + noleg
options(repr.plot.width=10, repr.plot.height=10)
gfree2 <- ggarrange(gfree, gtable.free+theme(plot.margin = unit(c(-0.5, 0, 0, 1.6), "cm")),
ncol=1, nrow=2, heights=c(5,1))
gfree2
The IPSS-M is already provided on the dataframe, with columns:
However, in this section, we re-calculate it, in order to illustrate how to do it!
path.file <- system.file("extdata", "IPSSMexample.csv", package = "ipssm")
lala <- IPSSMread(path.file)
colnames(lala)
# Preparing Input Data
tmp.ipssm <- dd
tmp.ipssm$del7_7q <- as.numeric(tmp.ipssm$del7 | tmp.ipssm$del7q)
tmp.ipssm$del17_17p <- as.numeric(tmp.ipssm$del17 | tmp.ipssm$del17p)
tmp.ipssm$complex <- car::recode(tmp.ipssm$complex, "'complex'='1'; 'non-complex'='0'")
tmp.ipssm$CYTO_IPSSR <- car::recode(tmp.ipssm$CYTO_IPSSR , "'Very-Good'='Very Good' ; 'Int'='Intermediate' ; 'Very-Poor'='Very Poor'")
tmp.ipssm$TP53mut <- as.vector(sapply(dd$ID, function(x) sum(maf$ID==x & maf$GENE=="TP53")))
tmp.ipssm$TP53mut <- car::recode(tmp.ipssm$TP53mut, " '2'='2 or more' ; '3'='2 or more' ")
tmp.ipssm$TP53loh <- 0 ; tmp.ipssm$TP53loh[which(tmp.ipssm$chr17%in%c("cnloh","del"))] <- 1
tmp.ipssm$TP53maxvaf <- NA
tmp.ipssm$FLT3[which(tmp.ipssm$FLT3_ITD==1)] <- 1
tmp.ipssm <- tmp.ipssm[,colnames(lala)]
# Processing Inout
tmp.process <- IPSSMprocess(tmp.ipssm)
tmp.process[which(tmp.process$ID=="E-H-106249-T1-1-D1-1"),"TP53multi"] <- 0 # just one exeption - erverting back as del17p not at the TP53 locus
# Calculating the IPSSM
tmp.res <- IPSSMmain(tmp.process)
table(tmp.res$IPSSMcat_mean, dd$IPSSM) # we re-calculated the IPSS-M indeed
dd$SF3B1_5q <- tmp.res$SF3B1_5q
dd$SF3B1_alpha <- tmp.res$SF3B1_alpha
dd$IPSSM <- factor(dd$IPSSM, levels=c("Very-Low","Low","Moderate-Low","Moderate-High","High","Very-High"))
table(dd$IPSSM, exclude=F)
summary(dd$IPSSM_SCORE)
gcol <- rev(pal_jama()(5))
gcol <- gcol[c(1,5,3,4,2)]
ggcol <- c(gcol[1:3],pal_jama()(6)[6],gcol[4:5])
tmp = dd[!is.na(dd$IPSSM_SCORE),]
dens <- density(tmp$IPSSM_SCORE)
dens <- data.frame(x=dens$x, y=dens$y)
options(repr.plot.width=12, repr.plot.height=7)
cutpoints.center <- c(-1.5,-0.5,0,0.5,1.5)
gscore <- ggplot(dens, aes(x=x,y=y)) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(colour = "black", size=1)) +
gtheme(20) +
# highlight the cutoff for categorization
geom_area(mapping = aes(x = x, y=ifelse( (x<=cutpoints.center[1]) , y, 0)), fill = ggcol[1], alpha=.6)+
geom_area(mapping = aes(x = x, y=ifelse( (x>cutpoints.center[1]&x<=cutpoints.center[2]) , y, 0)), fill = ggcol[2], alpha=.6)+
geom_area(mapping = aes(x = x, y=ifelse( (x>cutpoints.center[2]&x<=cutpoints.center[3]) , y, 0)), fill = ggcol[3], alpha=.6)+
geom_area(mapping = aes(x = x, y=ifelse( (x>cutpoints.center[3]&x<=cutpoints.center[4]) , y, 0)), fill = ggcol[4], alpha=.6)+
geom_area(mapping = aes(x = x, y=ifelse( (x>cutpoints.center[4]&x<=cutpoints.center[5]) , y, 0)), fill = ggcol[5], alpha=.6)+
geom_area(mapping = aes(x = x, y=ifelse( (x>cutpoints.center[5]) , y, 0)), fill = ggcol[6], alpha=.6) +
geom_vline(xintercept = cutpoints.center[-3],col="black",linetype=2,size=1) +
geom_vline(xintercept = cutpoints.center[3],col="black",linetype=2,size=.5) +
# plot density
geom_line(size=3) +
# scaling
scale_x_continuous(breaks=c(-2,-1,0,1,2,3,4),
limits = c(-3,4),
sec.axis = dup_axis(name="Hazard ratio (from average patient)",
labels=c(2^-2,2^-1,2^0,2^1,2^2,2^3,2^4))
) +
# labels
xlab("IPSS-M risk score") + ylab("Density") + ylim(c(0,.37)) +
# annotate category
annotate(geom="text",x=-2,y=.37,label=" VL ",size=8,col=ggcol[1],fontface=2) +
annotate(geom="text",x=-2,y=.345,label="14%",size=5,col=ggcol[1]) +
annotate(geom="text",x=-1,y=.37,label=" L ",size=8,col=ggcol[2],fontface=2) +
annotate(geom="text",x=-1,y=.345,label="33%",size=5,col=ggcol[2]) +
annotate(geom="text",x=-0.25,y=.37,label=" ML ",size=8,col=ggcol[3],fontface=2) +
annotate(geom="text",x=-0.25,y=.345,label="11%",size=5,col=ggcol[3]) +
annotate(geom="text",x=0.25,y=.37,label=" MH ",size=8,col=ggcol[4],fontface=2) +
annotate(geom="text",x=0.25,y=.345,label="11%",size=5,col=ggcol[4]) +
annotate(geom="text",x=1,y=.37,label=" H ",size=8,col=ggcol[5],fontface=2) +
annotate(geom="text",x=1,y=.345,label="14%",size=5,col=ggcol[5]) +
annotate(geom="text",x=2,y=.37,label=" VH ",size=8,col=ggcol[6],fontface=2) +
annotate(geom="text",x=2,y=.345,label="17%",size=5,col=ggcol[6]) +
# annotate interpretation
geom_segment(
x = 0.5, y = 0.05,
xend = 1.5, yend = 0.05,
lineend = "round",linejoin = "round",
size = 1,
arrow = arrow(length = unit(0.1, "inches")),
colour = "black"
) +
geom_segment(
x = -0.5, y = 0.05,
xend = -1.5, yend = 0.05,
lineend = "round",linejoin = "round",
size = 1,
arrow = arrow(length = unit(0.1, "inches")),
colour = "black"
) +
annotate(geom="text",x=0,y=.04,label="average \n patient",size=6,col="black", fontface=2) +
annotate(geom="text",x=.65,y=.08,label=c(expression(Delta),""),size=8,col="black")+
annotate(geom="text",x=1,y=.08,label="score=1",size=6,col="black")+
annotate(geom="text",x=1,y=.06,label="double risk",size=4,col="black")+
annotate(geom="text",x=-1.37,y=.08,label=c(expression(Delta),""),size=8,col="black")+
annotate(geom="text",x=-1,y=.08,label="score=-1",size=6,col="black")+
annotate(geom="text",x=-1,y=.06,label="half risk",size=4,col="black")
gscore
rr <- dd
rr$risk.cat <- factor(rr$IPSSM, levels=c("Low","Very-Low",levels(dd$IPSSM)[3:6])) # make Low the reference level
ff = as.formula(paste("Surv(LFS_YEARS,LFS_STATUS)~","risk.cat"))
c.iwg <- coxph(ff, data=rr) ; c.iwg.free <- c.iwg
zc <- as.data.frame( summary(c.iwg)$conf.int )[,c(1,3,4)] ; colnames(zc) <- c("HR","HRlow95","HRup95")
zc <- rbind(data.frame(HR=1,HRlow95=0,HRup95=0),zc)
zc$category <- levels(rr$risk.cat)
zc$category <- factor(zc$category, levels=levels(dd$IPSSM))
zc$endpoint <- "Leukemia-free survival"
options(repr.plot.width=6, repr.plot.height=7.6)
gvf <- ggplot(zc, aes(x=category,y=HR, ymin=HRlow95,ymax=HRup95,color=category)) +
geom_hline(yintercept=1,color="darkgrey",size=.4) +
geom_pointrange(size=1.5,position = position_dodge(width=.6),shape=18) +
scale_y_log10(breaks=c(0.5,1,3,10,30,100),labels=c(0.5,1,3,10,30,100)) +
scale_color_manual(values=ggcol) + scale_fill_manual(values=ggcol) +
theme_light() + gtheme(18) +
ylab("Hazard ratio for transformation or death (95% CI)") + xlab("IPSS-M") +
noleg +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
annotate("text",label="reference",x="Low",y=1.15,col=gcol[2],size=5)
gvf
## KM CURVES
# LFS
ff.free = as.formula(paste("Surv(LFS_YEARS,LFS_STATUS)~","IPSSM"))
km.free = survfit(ff.free ,data=dd) ; km.free$call$formula <- ff.free
myleg.free = paste0(gsub("IPSSM=","",names(km.free$strata))," (",km.free$n,")")
# OS
ff.os = as.formula(paste("Surv(OS_YEARS,OS_STATUS)~","IPSSM"))
km.os = survfit(ff.os ,data=dd) ; km.os$call$formula <- ff.os
myleg.os = paste0(gsub("IPSSM=","",names(km.os$strata))," (",km.os$n,")")
# LFS
gall.free <- ggsurvplot(km.free, data=dd,
color="IPSSM",
#legend.labs=myleg.free,
legend.title="IPSS-M",
size=1.1, conf.int=F,
xlim=c(0,10), break.time.by=1,
pval=T, pval.coord=c(8.5, 0.99), pval.size=5,
surv.median.line="hv",
palette=ggcol,
risk.table=TRUE,
tables.col="IPSSM", tables.y.text=FALSE,
risk.table.title="No. at risk",risk.table.fontsize=5,
title="Leukemia-free survival"
)
gfree <- gall.free$plot + theme_classic() + gtheme(20) + topleg +
theme(legend.key.width = unit(1.1,"cm")) + xlab("Years") + ylab("Probability") +
guides(color = guide_legend(override.aes = list(shape = NA)))
gtable.free <- gall.free$table + gtheme(18) +
theme(axis.line=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank()) +
noxtitle + noytitle + noleg
ggfree <- ggarrange(gfree+noleg, gtable.free+theme(plot.margin = unit(c(-0.5, 0, 0, 1.5), "cm")),
ncol=1, nrow=2, heights=c(5,1))
options(repr.plot.width=9.5, repr.plot.height=10)
ggfree
# OS
gall.os <- ggsurvplot(km.os, data=dd,
color="IPSSM",
#legend.labs=myleg.free,
legend.title="IPSS-M",
size=1.1, conf.int=F,
xlim=c(0,10), break.time.by=1,
pval=T, pval.coord=c(8.5, 0.99), pval.size=5,
surv.median.line="hv",
palette=ggcol,
risk.table=TRUE,
tables.col="IPSSM", tables.y.text=FALSE,
risk.table.title="No. at risk",risk.table.fontsize=5,
title="Overall survival"
)
gos <- gall.os$plot + theme_classic() + gtheme(20) + topleg +
theme(legend.key.width = unit(1.1,"cm")) + xlab("Years") + ylab("Probability") +
guides(color = guide_legend(override.aes = list(shape = NA)))
gtable.os <- gall.os$table + gtheme(18) +
theme(axis.line=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank()) +
noxtitle + noytitle + noleg
ggos <- ggarrange(gos+noleg, gtable.os+theme(plot.margin = unit(c(-0.5, 0, 0, 1.5), "cm")),
ncol=1, nrow=2, heights=c(5,1))
options(repr.plot.width=9.5, repr.plot.height=10)
ggos
# Prepare for Competing Risk Analysis:
dd$comp_sample_years = pmin(dd$OS_YEARS, dd$AMLt_YEARS)
dd$comp_status = "censor"
dd$comp_status[which(dd$OS_STATUS==1)] = "death"
dd$comp_status[which(dd$AMLt_STATUS==1)] = "aml"
i <- which(is.na(dd$comp_sample_years) & !is.na(dd$comp_status))
dd$comp_status[i] <- NA
dd$comp_status = factor(dd$comp_status, levels=c("censor","aml","death"))
table(dd$comp_status,exclude=F)
get_df_cif <- function(cmfit, vec.time=seq(0,6,0.05)) {
fit <- timepoints(cmfit, vec.time)
# estimates
t.est <- as.data.frame(t(fit$est))
t.est$time <- as.numeric(rownames(t.est))
u.est.aml <- pivot_longer(data=t.est, cols=contains("aml"), names_to = "strata", values_to = "est_aml",)
u.est.death <- pivot_longer(data=t.est, cols=contains("death"), names_to = "strata", values_to = "est_death",)
# variance
t.var <- as.data.frame(t(fit$var))
t.var$time <- as.numeric(rownames(t.var))
u.var.aml <- pivot_longer(data=t.var, cols=contains("aml"), names_to = "strata", values_to = "var_aml",)
u.var.death <- pivot_longer(data=t.var, cols=contains("death"), names_to = "strata", values_to = "var_death",)
# build df
df <- cbind( u.est.aml[,c("time","est_aml","strata")] , u.est.death[,c("est_death")] ,
u.var.aml[,c("var_aml")] , u.var.death[,c("var_death")]
)
df$strata <- gsub(" death","",gsub(" aml","",df$strata) )
df$lower_aml <- df$est_aml - 1.96*sqrt(df$var_aml)
df$upper_aml <- df$est_aml + 1.96*sqrt(df$var_aml)
df$lower_death <- df$est_death - 1.96*sqrt(df$var_death)
df$upper_death <- df$est_death + 1.96*sqrt(df$var_death)
return(df)
}
# CAUSE DEATH WITH AML || DEATH WITHOUT AML
tmp <- dd
# CAUSE SPECIFIC: AML-t || DEATH WITHOUT AML
tmp$status1 <- car::recode(as.vector(tmp$comp_status),"'death'='death_without'")
tmp$time1 <- tmp$comp_sample_years
# CAUSE SPECIFIC: DEATH WITH AML || DEATH WITHOUT AML
tmp$status2 <- "censor"
tmp$status2[tmp$OS_STATUS==1 & tmp$AMLt_STATUS==1] <- "d_with_aml" # death with aml
tmp$status2[tmp$OS_STATUS==1 & tmp$AMLt_STATUS==0] <- "death_without" # death without aml
tmp$time2 <- tmp$OS_YEARS
i <- which( is.na(tmp$AMLt_YEARS) | is.na(tmp$AMLt_STATUS) )
tmp$time2[i] <- NA ; tmp$status2[i] <- NA
tmp$status2[which(is.na(tmp$time2))] <- NA
table(tmp$status1, tmp$status2,exclude=F)
cmfit1 = with(tmp,cuminc(time1,status1,IPSSM,cencode="censor"))
rr1 = get_df_cif(cmfit=cmfit1, vec.time = seq(0, 10, 0.05))
colnames(rr1) <- gsub("death","death_without",colnames(rr1))
cmfit2 = with(tmp,cuminc(time2,status2,IPSSM,cencode="censor"))
rr2 = get_df_cif(cmfit=cmfit2, vec.time = seq(0, 10, 0.05))
colnames(rr2) <- gsub("death","death_without",colnames(rr2))
colnames(rr2) <- gsub("aml","death_with",colnames(rr2))
rr3 <- cbind(rr1, rr2[,colnames(rr2)[!grepl("out",colnames(rr2))&grepl("death_with",colnames(rr2))]])
rr3$est_minus_os <- rr3$est_death_with + rr3$est_death_without
rr3$est_minus_lfs <- rr3$est_aml + rr3$est_death_without
rr3.long <- pivot_longer(rr3[,c("time","strata",colnames(rr3)[grepl("est",colnames(rr3))])],
cols = starts_with("est"))
rr3.long$strata <- factor(rr3.long$strata, levels=levels(tmp$IPSSM))
rr3.long$name <- factor(rr3.long$name,
levels=c("est_aml","est_death_with","est_death_without","est_minus_os","est_minus_lfs"))
casatt <- met.brewer("Cassatt2")
col.fig2e <- c(casatt[1],casatt[3],casatt[7],casatt[8],casatt[10])
options(repr.plot.width=27, repr.plot.height=6)
gcif2e <- ggplot(data = rr3.long, aes(x = time, y = value, linetype = name, color=name)) +
geom_line(size=1.4) +
scale_x_continuous(breaks=0:10) + ylim(c(0,1)) +
theme_classic() + gtheme(24) + topleg +
xlab("Years") + ylab("Cumulative incidence") + facet_grid(.~strata) +
scale_color_manual(values=col.fig2e,
name="cause",
labels=c("AML transformation","Death with AML","Death without AML",
"Death (1 - OS)","Death or AML transformation (1-LFS)")) +
scale_linetype_manual(values=c("longdash","dotdash","dotted"
,"solid","dashed"),
name="cause",
labels=c("AML transformation","Death with AML","Death without AML",
"Death (1 - OS)","Death or AML transformation (1-LFS)")) +
theme(legend.key.width = unit(3.4,"cm"))
gcif2e
g <- ggplot_gtable(ggplot_build(gcif2e))
strips <- which(grepl('strip-', g$layout$name))
pal <- brewer.pal(8, "Paired")
for (i in seq_along(strips)) {
k <- which(grepl('rect', g$grobs[[strips[i]]]$grobs[[1]]$childrenOrder))
#l <- which(grepl('titleGrob', g$grobs[[strips[i]]]$grobs[[1]]$childrenOrder))
g$grobs[[strips[i]]]$grobs[[1]]$children[[k]]$gp$fill <- ggcol[i]
g$grobs[[strips[i]]]$grobs[[1]]$children[[k]]$gp$alpha <- .6
}
plot(g)
getCI <- function(time, status, myscore, mydf) {
ff <- as.formula(paste("Surv(",time,",",status,")~",myscore))
res <- survConcordance(ff, data=mydf)
return(list(ci=res$concordance, se=res$std.err))
}
ww <- dd
ww$vcat <- as.numeric(car:::recode(ww$IPSSM,
"'Very-Low'='0'; 'Low'='1';
'Moderate-Low'='2';
'Moderate-High'='2';
'High'='3'; 'Very-High'='4'"))
ww$icat <- as.numeric(car:::recode(ww$IPSSR,
"'Very-Low'='0'; 'Low'='1';
'Int'='2';
'High'='3'; 'Very-High'='4'"))
ww.active <- ww[!is.na(ww$IPSSM_SCORE) & !is.na(ww$IPSSR_SCORE),]
# IPSS-M SCORE
# TRAIN
ci.train.free.sample <- getCI(time="LFS_YEARS", status="LFS_STATUS", myscore="IPSSM_SCORE", mydf=ww.active)
ci.train.os.sample <- getCI(time="OS_YEARS", status="OS_STATUS", myscore="IPSSM_SCORE", mydf=ww.active)
ci.train.aml.sample <- getCI(time="AMLt_YEARS", status="AMLt_STATUS", myscore="IPSSM_SCORE", mydf=ww.active)
dci.mol.score <- data.frame(
CI=c(
ci.train.free.sample$ci,ci.train.os.sample$ci,ci.train.aml.sample$ci
),
se=c(
ci.train.free.sample$se,ci.train.os.sample$se,ci.train.aml.sample$se
),
endpoint=rep(c("Leukemia-free survival","Overall survival","Leukemic transformation"),each=1)
)
dci.mol.score$model <- "IPSS-M"
dci.mol.score$type <- "score"
# IPSS-R SCORE
# TRAIN
ci.train.free.sample <- getCI(time="LFS_YEARS", status="LFS_STATUS", myscore="IPSSR_SCORE", mydf=ww.active)
ci.train.os.sample <- getCI(time="OS_YEARS", status="OS_STATUS", myscore="IPSSR_SCORE", mydf=ww.active)
ci.train.aml.sample <- getCI(time="AMLt_YEARS", status="AMLt_STATUS", myscore="IPSSR_SCORE", mydf=ww.active)
dci.ipss.score <- data.frame(
CI=c(
ci.train.free.sample$ci,ci.train.os.sample$ci,ci.train.aml.sample$ci
),
se=c(
ci.train.free.sample$se,ci.train.os.sample$se,ci.train.aml.sample$se
),
endpoint=rep(c("Leukemia-free survival","Overall survival","Leukemic transformation"),each=1)
)
dci.ipss.score$model <- "IPSS-R"
dci.ipss.score$type <- "score"
# IPSS-M CAT
# TRAIN
ci.train.free.sample <- getCI(time="LFS_YEARS", status="LFS_STATUS", myscore="vcat", mydf=ww.active)
ci.train.os.sample <- getCI(time="OS_YEARS", status="OS_STATUS", myscore="vcat", mydf=ww.active)
ci.train.aml.sample <- getCI(time="AMLt_YEARS", status="AMLt_STATUS", myscore="vcat", mydf=ww.active)
dci.mol.cat <- data.frame(
CI=c(
ci.train.free.sample$ci,ci.train.os.sample$ci,ci.train.aml.sample$ci
),
se=c(
ci.train.free.sample$se,ci.train.os.sample$se,ci.train.aml.sample$se
),
endpoint=rep(c("Leukemia-free survival","Overall survival","Leukemic transformation"),each=1)
)
dci.mol.cat$model <- "IPSS-M"
dci.mol.cat$type <- "category"
# IPSS-R CAT
# TRAIN
ci.train.free.sample <- getCI(time="LFS_YEARS", status="LFS_STATUS", myscore="icat", mydf=ww.active)
ci.train.os.sample <- getCI(time="OS_YEARS", status="OS_STATUS", myscore="icat", mydf=ww.active)
ci.train.aml.sample <- getCI(time="AMLt_YEARS", status="AMLt_STATUS", myscore="icat", mydf=ww.active)
dci.ipss.cat <- data.frame(
CI=c(
ci.train.free.sample$ci,ci.train.os.sample$ci,ci.train.aml.sample$ci
),
se=c(
ci.train.free.sample$se,ci.train.os.sample$se,ci.train.aml.sample$se
),
endpoint=rep(c("Leukemia-free survival","Overall survival","Leukemic transformation"),each=1)
)
dci.ipss.cat$model <- "IPSS-R"
dci.ipss.cat$type <- "category"
dci <- rbind(dci.mol.score, dci.mol.cat,
dci.ipss.score, dci.ipss.cat
)
dci$model <- factor(dci$model, levels=c("IPSS-R","IPSS-M"))
dci$endpoint <- factor(dci$endpoint, levels=c("Leukemia-free survival","Overall survival","Leukemic transformation"))
col.fig3a <- met.brewer("Cassatt2")[c(4,7)]
ggci <- ggplot(dci, aes(x=model,y=CI,color=type,shape=type)) +
geom_point(stat="identity",size=4,position=position_dodge(width=0.2)) +
geom_errorbar(aes(ymin=CI-1.96*se,ymax=CI+1.96*se),width=0.15,position=position_dodge(width=0.2),size=1) +
#scale_y_continuous(limits=c(.68,.81),breaks = seq(.68,.81,0.01)) +
scale_y_continuous(breaks = seq(.6,.85,0.01)) +
theme_light() + gtheme(18) +
xlab("Model") + ylab("C-index (95% CI)") +
scale_color_manual(values=col.fig3a,name="Encoding", labels=c("5 categories","score")) +
scale_shape_manual(values=c(15,19),name="Encoding", labels=c("5 categories","score")) +
topleg +
facet_wrap(.~endpoint, scales="free_y") +
theme(strip.background =element_rect(fill="grey45")) +
theme(strip.text = element_text(colour = 'white')) +
theme(legend.key.width = unit(1.5,"cm"))
options(repr.plot.width=12, repr.plot.height=6)
ggci
aa <- dd[!is.na(dd$IPSSR) & !is.na(dd$IPSSM),]
nrow(aa)
ta <- table(aa$IPSSR)
aa$IPSSR4 <- paste0(aa$IPSSR," (", ta[aa$IPSSR], ")")
tl <- sort(unique(aa$IPSSR4))[c(5,3,2,1,4)]
sort(unique(aa$IPSSR4))
aa$IPSSR4 <- factor(aa$IPSSR4,levels=tl)
t <- table(aa$IPSSR, aa$IPSSM)
t
aa$tmp.cat <- as.vector(aa$IPSSM)
aa$tmp.cat[aa$tmp.cat%in%c("Moderate-Low","Moderate-High")] <- "Int"
aa$tmp.cat <- factor(aa$tmp.cat, levels=levels(aa$IPSSR))
tr <- table(aa$IPSSR, aa$tmp.cat)
ttr <- tr # for re-stratification
diag(ttr) <- 0
hhr <- ttr # f
ttr
tr <- table(aa$IPSSR, aa$tmp.cat)
ttr <- tr # for re-stratification
diag(ttr) <- 0
hhr <- ttr # for re-stratification with shift > 1
hhr["Very-Low","Low"] <- 0
hhr["Low","Very-Low"] <- 0
hhr["Low","Int"] <- 0
hhr["Int","Low"] <- 0
hhr["Int","High"] <- 0
hhr["High","Int"] <- 0
hhr["High","Very-High"] <- 0
hhr["Very-High","High"] <- 0
ttr.upstage <- ttr
ttr.upstage[lower.tri(ttr.upstage)] <- 0
ttr.downstage <- ttr
ttr.downstage[upper.tri(ttr.downstage)] <- 0
hhr.upstage <- hhr
hhr.upstage["Int","Very-Low"] <- 0
hhr.upstage["High","Low"] <- 0
hhr.upstage["Very-High","Int"] <- 0
hhr.downstage <- hhr
hhr.downstage["Very-Low","Int"] <- 0
hhr.downstage["Very-Low","High"] <- 0
hhr.downstage["Low","High"] <- 0
hhr.downstage["Low","Very-High"] <- 0
hhr.downstage["Int","Very-High"] <- 0
hr <- data.frame(IPSSR=levels(aa$IPSSR),
IPSSR4=levels(aa$IPSSR4),
reclassify_percent = 100*apply(ttr,1,sum)/apply(tr,1,sum),
reclassify_down_percent = 100*apply(ttr.downstage,1,sum)/apply(tr,1,sum),
reclassify_up_percent = 100*apply(ttr.upstage,1,sum)/apply(tr,1,sum),
reclassify_n = apply(ttr,1,sum),
reclassify_high_percent = 100*apply(hhr,1,sum)/apply(tr,1,sum),
reclassify_high_down_percent = 100*apply(hhr.downstage,1,sum)/apply(tr,1,sum),
reclassify_high_up_percent = 100*apply(hhr.upstage,1,sum)/apply(tr,1,sum),
reclassify_high_n = apply(hhr,1,sum)
)
hr$IPSSR <- factor(hr$IPSSR, levels=hr$IPSSR)
q1 <- ggplot(aa,aes(fill=IPSSM,x=IPSSR4)) +
geom_bar(position="fill", color="black",width=.84) +
theme_classic() + gtheme(18) + theme(axis.line=element_blank(), axis.ticks.y = element_blank()) +
topleg +
scale_fill_manual(name="IPSS-M", values=ggcol) +
scale_y_continuous(breaks=c(0,.25,.50,.75,1),labels=c(0,25,50,75,100)) +
xlab("IPSS-R") + ylab("%") + coord_flip()
hr1 <- hr[,c("IPSSR","reclassify_down_percent")]
colnames(hr1) <- c("IPSSR","rr")
hr1$type <- "downstaged"
hr2 <- hr[,c("IPSSR","reclassify_up_percent")]
colnames(hr2) <- c("IPSSR","rr")
hr2$type <- "upstaged"
hr12 <- rbind(hr1,hr2)
qq2 <- ggplot(hr12,aes(y=rr,x=IPSSR,fill=type)) +
geom_bar(stat="identity",color="black",width=.84) +
theme_classic() + gtheme(18) + theme(axis.line=element_blank(), axis.ticks.y = element_blank()) +
scale_fill_manual(name="re-stratification", values=c("grey65","grey35")) +
guides(fill=guide_legend(nrow=2,byrow=TRUE)) +
topleg +
scale_y_continuous(breaks=seq(0,80,20),limits=c(0,70)) +
noytitle + noylabel + ylab("% re-stratified") + coord_flip()
#qq2
hhr1 <- hr[,c("IPSSR","reclassify_high_down_percent")]
colnames(hhr1) <- c("IPSSR","rr")
hhr1$type <- "downstaged"
hhr2 <- hr[,c("IPSSR","reclassify_high_up_percent")]
colnames(hhr2) <- c("IPSSR","rr")
hhr2$type <- "upstaged"
hhr12 <- rbind(hhr1,hhr2)
qq3 <- ggplot(hhr12,aes(y=rr,x=IPSSR,fill=type)) +
geom_bar(stat="identity",color="black",width=.84) +
theme_classic() + gtheme(18) + theme(axis.line=element_blank(), axis.ticks.y = element_blank()) +
scale_fill_manual(name="re-stratification", values=c("grey65","grey35")) +
guides(fill=guide_legend(nrow=2,byrow=TRUE)) +
topleg +
scale_y_continuous(breaks=seq(0,12,2),limits=c(0,13)) +
noytitle + noylabel + ylab("% re-stratified with >1 shifts") + coord_flip()
#qq3
options(repr.plot.width=16, repr.plot.height=3)
gre <- ggarrange(q1,ggarrange(qq2,qq3,common.legend=T),
widths=c(3,2),ncol=2)
gre
We also show the KM stratified by IPSS-M per IPSS-R strata, and vice-versa, below:
listg1 <- lapply( levels(aa$IPSSR) , function(cc) {
hmp <- aa[which(aa$IPSSR==cc),]
t <- table(hmp$IPSSM)
cckeep <- names(t)[which(t>5)]
hmp <- hmp[hmp$IPSSM%in%cckeep,]
hmp$IPSSM <- factor(hmp$IPSSM, levels=cckeep)
ff = as.formula(paste("Surv(LFS_YEARS,LFS_STATUS)~","IPSSM"))
kmfit = survfit(ff,data=hmp)
kmfit$call$formula <- ff
myleg = paste0(gsub("IPSSM=","",names(kmfit$strata))," (",kmfit$n,")")
g <- ggsurvplot(kmfit, data=hmp,
color="IPSSM",
legend.labs=myleg,
legend.title="IPSS-M",
size=1.1, conf.int=F,
xlim=c(0,10), break.time.by=1,
pval=T, pval.coord=c(7.7, 0.99), pval.size=7,
surv.median.line="hv",
)$plot +
theme_classic() + gtheme(22) + topleg +
theme(legend.key.width = unit(1.1,"cm")) +
facet_grid(.~paste("IPSS-R",cc)) +
scale_color_manual(name="IPSS-M", values=ggcol[which(t>5)]) +
noxtitle + noytitle + guides(color=guide_legend(nrow=2)) +
guides(color = guide_legend(override.aes = list(shape = NA))) +
guides(color=guide_legend(nrow=6, ))
return(g)
}
)
options(repr.plot.width=30, repr.plot.height=9)
figure <- ggarrange(plotlist=listg1, common.legend = F, ncol=5)
fre <- annotate_figure(figure,
left = text_grob("LFS probability", rot = 90, size=22),
bottom = text_grob("Years", rot = 0, size=22),
top = ""
)
fre
aa$IPSSM_s <- as.vector(aa$IPSSM)
aa$IPSSM_s[aa$IPSSM %in% c("Moderate-Low","Moderate-High")] <- "Moderate"
aa$IPSSM_s <- factor(aa$IPSSM_s, levels=c("Very-Low","Low","Moderate","High","Very-High"))
listg1 <- lapply( levels(aa$IPSSM_s) , function(cc) {
hmp <- aa[which(aa$IPSSM_s==cc),]
t <- table(hmp$IPSSR)
cckeep <- names(t)[which(t>5)]
hmp <- hmp[hmp$IPSSR%in%cckeep,]
hmp$IPSSR <- factor(hmp$IPSSR, levels=cckeep)
ff = as.formula(paste("Surv(LFS_YEARS,LFS_STATUS)~","IPSSR"))
kmfit = survfit(ff,data=hmp)
kmfit$call$formula <- ff
myleg = paste0(gsub("IPSSR=","",names(kmfit$strata))," (",kmfit$n,")")
g <- ggsurvplot(kmfit, data=hmp,
color="IPSSR",
legend.labs=myleg,
legend.title="IPSS-M",
size=1.1, conf.int=F,
xlim=c(0,10), break.time.by=1,
pval=T, pval.coord=c(7.8, 0.99), pval.size=7,
surv.median.line="hv",
)$plot +
theme_classic() + gtheme(22) + topleg +
theme(legend.key.width = unit(1.1,"cm")) +
facet_grid(.~paste("IPSS-M",cc)) +
scale_color_manual(name="IPSS-R", values=col.ipssr[which(t>5)]) +
noxtitle + noytitle + guides(color=guide_legend(nrow=2)) +
guides(color = guide_legend(override.aes = list(shape = NA))) +
guides(color=guide_legend(nrow=6))
return(g)
}
)
options(repr.plot.width=30, repr.plot.height=8.5)
figure <- ggarrange(plotlist=listg1, common.legend = F, ncol=5)
fre2 <- annotate_figure(figure,
left = text_grob("LFS probability", rot = 90, size=22),
bottom = text_grob("Years", rot = 0, size=22),
top = ""
)
fre2
aa <- dd[ !is.na(dd$IPSSM_SCORE) & !is.na(dd$IPSSR_SCORE) , ]
#Note that
sum(!is.na(dd$IPSSM_SCORE) & !is.na(dd$IPSSR_SCORE)) # score
#and that
sum(!is.na(dd$IPSSM) & !is.na(dd$IPSSR)) # category
The total number of patients for Fig 3B is 2678 (as we took all cases with IPSSM and R categories), while the total number of patients for Fig 3C is 2649 (as we took all cases with IPSSM and R score).
The reason there is a small difference is because for a few patients we could calculate the IPSSR category with certainty but not the IPSSR score (eg missing ANC).
Agree this is slightly unfortunate and we should probably have used the same total number of patients in both Fig 3B and 3C...
# Simplified Categories
l1 <- c("(Very)Low","Int","(Very)High")
aa$simple.ipssr <- car:::recode(aa$IPSSR,
"'Very-Low'='(Very)Low'; 'Low'='(Very)Low'; 'High'='(Very)High'; 'Very-High'='(Very)High'")
aa$simple.ipssr <- factor(aa$simple.ipssr, levels=l1)
table(aa$simple.ipssr)
l2 <- c("(Very)Low","Moderate","(Very)High")
aa$simple.ipssm <- car:::recode(aa$IPSSM,
"'Very-Low'='(Very)Low'; 'Low'='(Very)Low'; 'Moderate-Low'='Moderate'; 'Moderate-High'='Moderate'; 'High'='(Very)High'; 'Very-High'='(Very)High'")
aa$simple.ipssm <- factor(aa$simple.ipssm, levels=l2)
table(aa$simple.ipssm)
genes.adverse.ipssm <- c("FLT3","TP53multi","NPM1","ETV6","MLL_PTD","KRAS","IDH2","NRAS","CBL","EZH2","U2AF1","RUNX1","DNMT3A","SRSF2","ASXL1","SF3B1_5q")
aa$FLT3[aa$FLT3_ITD==1] <- 1
aa$Nmain <- apply( aa[,genes.adverse.ipssm] , 1 , sum )
aa$Nmain.cat <- aa$Nmain
aa$Nmain.cat[aa$Nmain.cat>=3] = "3 or more"
gil <- lapply(levels(aa$simple.ipssr), function(iii) {
aai <- aa[which(aa$simple.ipssr==iii),]
t <- table(aai$simple.ipssm)
myx <- paste0(names(t)," (",t,")")
col.fig3c <- c(
"#f2f0f7",
"#cbc9e2",
"#9e9ac8",
"#6a51a3"
)
col.fig3c <- met.brewer("Cassatt2")
col.fig3c <- rev(col.fig3c[c(2,3,4,5)])
gi <- ggplot(aai, aes(x=simple.ipssm, fill=Nmain.cat)) +
facet_grid(.~paste0("IPSS-R ",iii," (n=",nrow(aai),")")) +
geom_bar(position="fill",color="black",width=.84) +
theme_light() + gtheme(20) +
theme(strip.background =element_rect(fill="grey45")) +
theme(strip.text = element_text(colour = 'white')) +
scale_y_continuous(breaks=c(0,.25,.50,.75,1),labels=c(0,25,50,75,100)) +
scale_x_discrete(labels=myx) +
noxtitle + noytitle +
scale_fill_manual(name="Number of adverse IPSS-M mutated main effect genes",
values=col.fig3c) +
coord_flip() + topleg
return(gi) })
options(repr.plot.width=18, repr.plot.height=4)
figure <- ggarrange(plotlist=gil, common.legend = T, ncol=3)
gnumtrans <- annotate_figure(figure,
left = text_grob("IPSS-M", rot = 90, size=20),
bottom = text_grob("% of patients within IPSS-M category", rot = 0, size=20),
)
gnumtrans
table(dd$MDS_TYPE, exclude=F)
ddt <- dd[!is.na(dd$MDS_TYPE),]
ddt$type <- gsub("s_t_MDS","s/t-MDS",ddt$MDS_TYPE)
table(ddt$type, exclude=F)
table(ddt$type, ddt$IPSSM, exclude=F)
ddt$IPSSM_s <- as.vector(ddt$IPSSM)
ddt$IPSSM_s[ddt$IPSSM %in% c("Moderate-Low","Moderate-High")] <- "Moderate"
ddt$IPSSM_s <- factor(ddt$IPSSM_s, levels=c("Very-Low","Low","Moderate","High","Very-High"))
options(repr.plot.width=7, repr.plot.height=5)
if (1==1) {
# Overall
ff = as.formula(paste("Surv(LFS_YEARS,LFS_STATUS)~","type"))
kmfit = survfit(ff,data=ddt)
kmfit$call$formula <- ff
myleg = paste0(gsub("type=","",names(kmfit$strata))," (",kmfit$n,")")
c <- coxph(ff,data=ddt)
cc <- summary(c)$conf
ccannot <- paste0("HR=",round(cc[,1],1)," (95% CI ",round(cc[,3],1),"-",round(cc[,4],1),")")
g <- ggsurvplot(kmfit, data=ddt, conf.int=F, size=1.1,
#legend.labs=myleg,
xlim=c(0,10),break.time.by=1,
pval=T, pval.coord=c(7.0, 0.9), pval.size=6.5,
surv.median.line = "hv",color="type",
palette=pal_nejm()(2),
risk.table=TRUE,
risk.tables.y.text=TRUE,
risk.tables.y.text.col=TRUE,
risk.table.title="",risk.table.fontsize=5)
gf <- g$plot + theme_classic() + gtheme(20) + topleg +
facet_grid(.~paste("Not Stratified")) +
theme(legend.key.width = unit(1.1,"cm")) +
noxtitle + noytitle + guides(color=guide_legend(nrow=2)) +
guides(color = guide_legend(override.aes = list(shape = NA))) +
annotate("label",label=ccannot,x=6.5,y=.99,size=6.5,label.size = NA) +
annotate("text", label="primary", x=9, y=0.32, size=6.5, color=pal_nejm()(2)[1]) +
annotate("text", label="s/t-MDS", x=9, y=0.09, size=6.5, color=pal_nejm()(2)[2])
gtf <- g$table + gtheme(18) +
theme(axis.line=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank()) +
noxtitle + noytitle + noleg
p0 <- get_legend(gf)
ggfree0 <- ggarrange(gf+noleg, gtf+theme(plot.margin = unit(c(-0.5, 0, 0, -0.3), "cm")),
ncol=1, nrow=2, heights=c(5,1))
}
ggcol2 <- ggcol[-4]
# Per Strata
listg <- lapply( 1:length(levels(ddt$IPSSM_s)) , function(ii) {
cc <- levels(ddt$IPSSM_s)[ii]
print(cc)
hmp <- ddt[which(ddt$IPSSM_s==cc),]
t <- table(hmp$IPSSM_s)
cckeep <- names(t)[which(t>5)]
hmp <- hmp[hmp$IPSSM_s%in%cckeep,]
hmp$risk.cat <- factor(hmp$IPSSM_s, levels=cckeep)
ff = as.formula(paste("Surv(LFS_YEARS,LFS_STATUS)~","type"))
kmfit = survfit(ff,data=hmp)
kmfit$call$formula <- ff
myleg = paste0(gsub("type=","",names(kmfit$strata))," (",kmfit$n,")")
lc <- coxph(ff,data=hmp)
lcc <- summary(lc)$conf
lccannot <- paste0("HR=",round(lcc[,1],1)," (95% CI ",round(lcc[,3],1),"-",round(lcc[,4],1),")")
#print(lccannot)
g <- ggsurvplot(kmfit, data=hmp, conf.int=F, size=1.1,
#legend.labs=myleg,
xlim=c(0,10),break.time.by=1,
pval=T, pval.coord=c(7.0, 0.9), pval.size=6.5,
surv.median.line = "hv",color="type",
palette=pal_nejm()(2),
risk.table=TRUE,
risk.tables.y.text=TRUE,
risk.tables.y.text.col=TRUE,
risk.table.title="",risk.table.fontsize=5)
gf <- g$plot + theme_classic() + gtheme(20) + noleg +
facet_grid(.~paste("IPSS-M",cc)) +
theme(legend.key.width = unit(1.1,"cm")) +
noxtitle + noytitle + guides(color=guide_legend(nrow=2)) +
guides(color = guide_legend(override.aes = list(shape = NA))) +
annotate("label",label=lccannot,x=6.5,y=.99,size=6.5,label.size = NA) +
theme(strip.background=element_rect(fill=alpha(ggcol2[ii],.6)))
if (cc=="Very Low") {
gf <- gf +
annotate("text", label="primary", x=8.8, y=0.66, size=6.5, color=pal_nejm()(2)[1]) +
annotate("text", label="s/t-MDS", x=6, y=0.58, size=6.5, color=pal_nejm()(2)[2])
}
if (cc=="Low") {
gf <- gf +
annotate("text", label="primary", x=8.9, y=0.44, size=6.5, color=pal_nejm()(2)[1]) +
annotate("text", label="s/t-MDS", x=7.4, y=0.29, size=6.5, color=pal_nejm()(2)[2])
}
if (cc=="Moderate") {
gf <- gf +
annotate("text", label="primary", x=9, y=0.2, size=6.5, color=pal_nejm()(2)[1]) +
annotate("text", label="s/t-MDS", x=7.3, y=0.31, size=6.5, color=pal_nejm()(2)[2])
}
if (cc=="High") {
gf <- gf +
annotate("text", label="primary", x=9, y=0.12, size=6.5, color=pal_nejm()(2)[1]) +
annotate("text", label="s/t-MDS", x=5.4, y=0.22, size=6.5, color=pal_nejm()(2)[2])
}
if (cc=="Very High") {
gf <- gf +
annotate("text", label="primary", x=9, y=0.1, size=6.5, color=pal_nejm()(2)[1]) +
annotate("text", label="s/t-MDS", x=2.8, y=0, size=6.5, color=pal_nejm()(2)[2])
}
gtf <- g$table + gtheme(18) +
theme(axis.line=element_blank(), axis.ticks.x=element_blank(),axis.text.x=element_blank()) +
noxtitle + noytitle + noleg
ggfree <- ggarrange(gf, gtf+theme(plot.margin = unit(c(-0.5, 0, 0, -0.06), "cm")),
ncol=1, nrow=2, heights=c(5,1))
return(ggfree)
}
)
listg1= c(list(ggfree0), listg)
figure <- ggarrange(plotlist=listg1, common.legend = F)
gs88 <- annotate_figure(figure,
left = text_grob("LFS probability \n ", rot = 90, size=22),
bottom = text_grob("Years", rot = 0, size=22),
top = ""
)
options(repr.plot.width=17.5, repr.plot.height=14)
gs88
nrow(dd)
table(dd$IPSSM)
sum(table(dd$IPSSM))
table(dd$IPSSM)
table(dd$IPSSM)/2701
sessionInfo()